package com.codemes.happylist.lucene.reader.pdf;

import com.codemes.happylist.lucene.reader.FileTextReader;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
import java.io.IOException;

/**
 * @author <a href="mail:fulei@fehorizon.com">fulei</a>
 * @date 2022/8/22 18:59
 */
@Slf4j
public class PdfFileTextReader implements FileTextReader {

    @Override
    public String readText(File file)  {
        StringBuilder allContent = new StringBuilder();
        try{
            PdfReader pdfReader = new PdfReader(file.getPath());
            PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
            PdfPageContentReadListener contentReadListener = new PdfPageContentReadListener();
            int pages = pdfReader.getNumberOfPages();
            for(int i=1;i<pages;i++){
                pdfReaderContentParser.processContent(i, contentReadListener);
                allContent.append(contentReadListener.getAllContent());
            }
        }catch (IOException e){
            log.error("读取pdf文件内容异常 file -> {} -> ", file.getPath(), e);
        }

        return allContent.toString();
    }

    public static void main(String[] args) {
        File file = new File("D:\\天翼云盘同步盘\\18914966247\\宏信\\面试简历\\测试\\测试工程师-罗小丽.pdf");
        System.out.println(file.exists());
        PdfFileTextReader textReader = new PdfFileTextReader();
        String content = textReader.readText(file);
        System.out.println(content);
    }
}
